## Replication file for Moore (2012)
## "Multivariate Continuous Blocking to Improve Political Science Experiments"

library(MASS)
library(blockTools)

####### Figure 1 ######
source("repOptGreedy.R")

## By hand dotplot
outmat <- matrix(c(5.046696, 5.076633, 4.229949, 4.832526, 5.38385, 5.955974, 4.668452, 5.439292, 3.663548, 3.958523, 3.315963, 3.803211, 5.170947, 5.833643, 4.068457, 4.657221), 8,2, byrow=TRUE) 
outmat <- outmat[nrow(outmat):1, ]
colnames(outmat) <- c("optgreedy", "naive")
labs <- c("Mean dist (all), rural", "Median dist (all), rural", "Mean dist (all), urban", "Median dist (all), urban", "Mean dist (best half), rural",  "Median dist (best half), rural",  "Mean dist (best half), urban",  "Median dist (best half), urban")
labs <- labs[length(labs):1]
gr <- c(rep(2,4), rep(1,4))

dotchart(outmat[,"optgreedy"], labels=labs, xlim=c(0,6), groups=gr, pch=19, main="Optimal-greedy versus Naive Algorithm", xlab="Average Mahalanobis Distance between Pairs", ylab="Optimal Criterion\n\n\n\n\n\n\n\n\n")
points(outmat[,"optgreedy"], c(1:4, 7:10), pch=19)
points(outmat[, "naive"], c(1:4, 7:10), pch=5)

rm(covars.rur.match, covars.urb.match, mat, matn, rurdata.mmgosp.comb, rurvc1, urbmmgosj1, urbvc1, storage, outmat, i, gr, labs, o, rurblock, rurblocknaive, urbblock, urbblocknaive)

###### Figure 2 #######
library(MASS)  
library(blockTools)
library(ellipse)

outl <- function(sss, plot.pdf = FALSE){
  set.seed(sss)   
  n <- 20  ## n obs
  id <- 1:n ## id var
  outs <- 2 ## n outliers
  out.fac <- 4  ## blow up SD by out.fac
  k <- 2    ## n vars
  lim <- 100
  mu <- rep(0,k)
  sigma.true <- matrix(c(1,.5,.5,1), k,k)
  x <- data.frame(id, matrix(mvrnorm(n, mu = mu, Sigma = sigma.true), n,
                             k, byrow=T))
  xout <- x
  xout[(n+1):(n+outs), c("X1","X2")] <- mvrnorm(outs, m=mu, Sigma=diag(rep(out.fac, k)))
  xout$id[(n+1):(n+outs)] <- (n+1):(n+outs)
  
  bx <- block(x, id.vars="id", verbose=F)
  bxout <- block(xout, id.vars="id", verbose=F)
  bxmve <- block(x, id.vars="id", distance="mve",verbose=F)
  bxoutmve <- block(xout, id.vars="id", distance="mve",verbose=F)
  
  ## Estimate covariance 3 ways
  sigma.init = cov(x[,c("X1","X2")])
  sigma.rob = cov.rob(xout[,c("X1", "X2")], method = "mve")$cov
  sigma.nonrob = cov(xout[,c("X1", "X2")])
  sigma <- list(sigma.init = sigma.init, sigma.rob = sigma.rob,
                sigma.nonrob = sigma.nonrob)
  main.t <- c("Source Distribution", "Robust Scaling", "Non-Robust Scaling")
  
  xl <- c(-4,4)
  yl <- c(-4,4)
  colvec <- c(rep("black", n+outs))
  
  if(plot.pdf==TRUE){
    pdf("~/Desktop/robell.pdf", height=2.5, width=6)
  }
  x1 <- -4
  x2 <- x1+.55
  y1 <- -.9
  y2 <- y1+.55
  lines.x <- c(x1, x1, x2, x2, x1)
  lines.y <- c(y1, y2, y2, y1, y1)
  x3 <- 1.5
  x4 <- x3+.55
  y3 <- -.55
  y4 <- y3+.55
  lines.x2 <- c(x3, x3, x4, x4, x3)
  lines.y2 <- c(y3, y4, y4, y3, y3)
    
  par(mfrow=c(1,3))
  par(mar=c(5,2,2,0.3))
  par(oma = rep(0, 4))
  par(mgp=c(.25, .25, 0))
  for(i in 1:length(sigma)){
    if(i == 1){ylabl <- "X2"}else{ylabl <- ""}
    
    for(j in c(.5, .95, .99)){
      plot(ellipse(sigma[[i]], level = j, centers=c(0,0)),
           type="l", ylim=yl, xlim=xl, col="grey", main=main.t[i], ylab = ylabl, yaxt= "n", xaxt = "n")
      text(xout$X1, xout$X2, xout$id, cex=.8, col=colvec)
      lines(x = lines.x, y = lines.y, lwd = .3, col = "grey")
      lines(x = lines.x2, y = lines.y2, lwd = .3, col = "grey")
      par(new=T)
    }
    par(new=F)
  }
  if(plot.pdf==TRUE){dev.off()}
  return(list(bx, bxout, bxmve, bxoutmve))
}

l <- outl(126)
## Compare orig blocks to MVE with outliers:
l[[1]]
l[[4]]
## Every pair is same; MVE adds worst block to end.
## Compare orig blocks to MD with outliers:
l[[1]]
l[[2]]
## Only two are same
rm(l, outl)


######### Figures 3 - 5 ##########
library(blockTools)
source("balanceCheck.R")
load("simNUXpvals.RData")

## Figure 3:
boxplot(p ~ trn, data = all.ps$d2.df, horizontal = TRUE, notch = TRUE, names = c("Random 
Allocation", "Complete
                                                                                 Random ", "Blocking"), las = 1, pars = list(boxwex = .6, cex.axis = .85), xlab = bquote(italic(d)^2 ~ italic(p)*"-values, 1000 Replications per Assignment Method"))

## Numerical summary:
aggregate(all.ps$d2.df$p, by = list(all.ps$d2.df$tr), summary)

## Figure 4:
source("plotPSboxplot.R")
ps.boxplot(all.ps, file = "~/Desktop/ks.pdf", wh.test = "ks", boxwxx = .07, varspace = .4, mainlab = "KS")
ps.boxplot(all.ps, file = "~/Desktop/t.pdf", wh.test = "tt", boxwxx = .07, varspace = .4, mainlab = "tt")
ps.boxplot(all.ps, file = "~/Desktop/w.pdf", wh.test = "wc", boxwxx = .07, varspace = .4, mainlab = "wc")

## Figure 5:
all.ps$diff.mean.df$assgn <- 4-as.numeric(all.ps$diff.mean.df$assg)
boxplot(diff.mean ~ assgn, data = all.ps$diff.mean.df, horizontal = TRUE, notch = TRUE, las = 1, names = c("Random 
Allocation", "Complete
Random ", "Blocking"), pars = list(boxwex = .6, cex.axis = .85))

rm(all.ps, ps.boxplot)

###### Figure 6: 
library(foreign)
library(Matching)
library(exactRankTests)
library(coin)
library(RItools)
library(blockTools)
library(weights)
load("perryData.RData")
source("calcBalanceTE.R")
source("balanceCheck.R")
source("plotfuncs.R")

### Perry covariates ####
covs.perry.approx <- c("p018", "isMale", "binet3rank")  ## SES, sex, IQ rank

###### Total arrests, diff means estimate:
out.perry.approx.totarrests <- calcBalanceTE(data = x, actual.tr = "preschool", vars.to.bal = covs.perry.approx, var.type = c("cont", "disc", "cont"), outcomes = "totalarr", id.vars = "ID", te.bl = "diff.means", te.complete = "diff.means", te.classical = "diff.means", seed = 754, n.rands = 100, block.vars = covs.perry.approx)

out.perry.approx.totarrests$bal.actual
summary(out.perry.approx.totarrests$bal.bl)

plot.bal.qq(out.perry.approx.totarrests, color = FALSE)

## better blocked estimates:
mea.co <- mean(out.perry.approx.totarrests$te.comp)
mea.cl <- mean(out.perry.approx.totarrests$te.clas)
mea.bl <- mean(out.perry.approx.totarrests$te.bl)
mea.bl
med.co <- median(out.perry.approx.totarrests$te.comp)
med.cl <- median(out.perry.approx.totarrests$te.clas)
med.bl <- median(out.perry.approx.totarrests$te.bl)
med.bl
sort(abs(c(mea.cl, mea.co, med.co, med.cl)))
sd.co <- sd(out.perry.approx.totarrests$te.comp)
sd.cl <- sd(out.perry.approx.totarrests$te.clas)
sd.bl <- sd(out.perry.approx.totarrests$te.bl)
1-abs(mea.bl)/abs(mea.cl) 
1-abs(mea.bl)/abs(mea.co)
1-abs(med.bl)/abs(med.co)
1-abs(med.bl)/abs(med.cl)
1- (sd.bl/sd.co)
1- (sd.bl/sd.cl)
rm(mea.co, mea.cl, mea.bl, med.co, med.cl, med.bl, sd.co, sd.cl, sd.bl)

###### Total arrests, regression estimate:
out.perry.approx.totarrests.reg <- calcBalanceTE(data = x, actual.tr = "preschool", vars.to.bal = covs.perry.approx, var.type = c("cont", "disc", "cont"), outcomes = "totalarr", id.vars = "ID", te.bl = "regcoef", te.complete = "regcoef", te.classical = "regcoef", seed = 754, n.rands = 100, block.vars = covs.perry.approx)

## better blocked estimates:
mea.co <- mean(out.perry.approx.totarrests.reg$te.comp)
mea.cl <- mean(out.perry.approx.totarrests.reg$te.clas)
mea.bl <- mean(out.perry.approx.totarrests.reg$te.bl)
med.co <- median(out.perry.approx.totarrests.reg$te.comp)
med.cl <- median(out.perry.approx.totarrests.reg$te.clas)
med.bl <- median(out.perry.approx.totarrests.reg$te.bl)
sd.co <- sd(out.perry.approx.totarrests.reg$te.comp)
sd.cl <- sd(out.perry.approx.totarrests.reg$te.clas)
sd.bl <- sd(out.perry.approx.totarrests.reg$te.bl)
1-abs(mea.bl)/abs(mea.cl)
1-abs(mea.bl)/abs(mea.co)
1-abs(med.bl)/abs(med.co)
1-abs(med.bl)/abs(med.cl) 
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)

plot.te(out.perry.approx.totarrests.reg, x.lims=c(-5,5))

mea.bl <- mean(out.perry.approx.totarrests$te.bl)
med.bl <- median(out.perry.approx.totarrests$te.bl)
sd.bl <- sd(out.perry.approx.totarrests$te.bl)
1-abs(mea.bl)/abs(mea.cl)
1-abs(mea.bl)/abs(mea.co)
1-abs(med.bl)/abs(med.co)
1-abs(med.bl)/abs(med.cl) 
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)
rm(mea.co, mea.cl, mea.bl, med.co, med.cl, med.bl, sd.bl, sd.co, sd.cl)

### FULL ####
covs.perry.all <- c("p013", "p018", "onWelfare", "momEmployed", "dadSkill", "p024", "isMale", "binet3", "dadHome", "medu")
load("perry100allArrests.RData")

out.perry.all.arrests$bal.actual
summary(out.perry.all.arrests$bal.bl)
plot.bal.qq(out.perry.all.arrests, color = FALSE)

load("perry100allArrestsReg.RData")
plot.te(out.perry.all.arrests.reg, x.lims=c(-6,6))

mea.co <- mean(out.perry.all.arrests.reg$te.comp)
mea.cl <- mean(out.perry.all.arrests.reg$te.clas)
mea.bl <- mean(out.perry.all.arrests.reg$te.bl)
sd.co <- sd(out.perry.all.arrests.reg$te.comp)
sd.cl <- sd(out.perry.all.arrests.reg$te.clas)
sd.bl <- sd(out.perry.all.arrests.reg$te.bl)
1-abs(mea.bl)/abs(mea.cl)
1-abs(mea.bl)/abs(mea.co)
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)
rm(mea.co, mea.cl, mea.bl, sd.bl, sd.co, sd.cl)

rm(x, covs.perry.all, covs.perry.approx, out.perry.all.arrests, out.perry.all.arrests.reg, out.perry.approx.totarrests, out.perry.approx.totarrests.reg)
####### End Fig 6  #########

### FIGURE 7 ########
library(foreign)
library(Matching)
library(exactRankTests)
library(coin)
library(RItools)
library(blockTools)
library(weights)
source("loadLRData.R")
source("calcBalanceTE.R")
source("balanceCheck.R")
source("plotfuncs.R")

## covs: province of residence, pledged support, represented group
covs.loerub.small <- names(x.resp)[61:79]
covs.loerub.small <- covs.loerub.small[-which(covs.loerub.small %in% c("NB", "Undeclared"))]

## Only small covs:
out.lr <- calcBalanceTE(data = x.resp, actual.tr = "isMailed", vars.to.bal = covs.loerub.small, var.type = rep("disc", length(covs.loerub.small)), outcomes = "IgRank", id.vars = "respondent_id", te.bl = "diff.means", te.complete = "diff.means", te.classical = "diff.means", seed = 63, n.rands = 100, block.vars = covs.loerub.small)

out.lr$bal.actual ## 0.444
balance.check(x, covs.loerub.small, tr.var = "isMailed", vars.type = rep("disc", 17), graph.qq = FALSE, graph.pvals = FALSE)$d2.p 
## .754

summary(out.lr$bal.bl)
plot.bal.qq(out.lr, color = FALSE)

## Only small covs, regression:
out.lr.reg <- calcBalanceTE(data = x.resp, actual.tr = "isMailed", vars.to.bal = covs.loerub.small, var.type = rep("disc", length(covs.loerub.small)), outcomes = "IgRank", id.vars = "respondent_id", te.bl = "regcoef", te.complete = "regcoef", te.classical = "regcoef", seed = 63, n.rands = 100, block.vars = covs.loerub.small)
load("lr100reg.RData")
plot.te(out.lr.reg)

## post measures:
post.loerub <- c("attention", "interest")
covs.loerub.big <- c(covs.loerub.small, post.loerub)
load("lrBig100.RData")
plot.bal.qq(out.lr.big, color = FALSE)
out.lr.big$bal.actual
summary(out.lr.big$bal.bl)

load("lrBig100Reg.RData")
out.lr.big.reg$bal.actual
summary(out.lr.big.reg$bal.bl)
plot.te(out.lr.big.reg, x.lims=c(-.6,.6))

(161-123)/123

sd.co <- sd(out.lr$te.comp)
sd.cl <- sd(out.lr$te.clas)
sd.bl <- sd(out.lr$te.bl)
r.bl <- max(out.lr$te.bl)-min(out.lr$te.bl)
r.co <- max(out.lr$te.comp)-min(out.lr$te.comp)
r.cl <- max(out.lr$te.clas)-min(out.lr$te.clas)
1-(sd.bl/sd.co) ## .1758
1-(sd.bl/sd.cl) ## .029
1-(r.bl/r.co) #.25
1-(r.bl/r.cl) #.15

sd.co <- sd(out.lr.reg$te.comp)
sd.cl <- sd(out.lr.reg$te.clas)
sd.bl <- sd(out.lr.reg$te.bl)
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)
r.bl <- max(out.lr.reg$te.bl)-min(out.lr.reg$te.bl)
r.co <- max(out.lr.reg$te.comp)-min(out.lr.reg$te.comp)
r.cl <- max(out.lr.reg$te.clas)-min(out.lr.reg$te.clas)
1-(r.bl/r.co) #.31
1-(r.bl/r.cl) #.08

sd.bl <- sd(out.lr$te.bl)
r.bl <- max(out.lr$te.bl)-min(out.lr$te.bl)
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)
1-(r.bl/r.co) 
1-(r.bl/r.cl) 

mea.co <- mean(out.lr.big$te.comp)
mea.cl <- mean(out.lr.big$te.clas)
mea.bl <- mean(out.lr.big$te.bl)
sd.co <- sd(out.lr.big$te.comp)
sd.cl <- sd(out.lr.big$te.clas)
sd.bl <- sd(out.lr.big$te.bl)
1-abs(mea.bl)/abs(mea.cl)
1-abs(mea.bl)/abs(mea.co)
1-(sd.bl/sd.co)
1-(sd.bl/sd.cl)

mea.co <- mean(out.lr.big.reg$te.comp)
mea.cl <- mean(out.lr.big.reg$te.clas)
mea.bl <- mean(out.lr.big.reg$te.bl)
sd.co <- sd(out.lr.big.reg$te.comp)
sd.cl <- sd(out.lr.big.reg$te.clas)
sd.bl <- sd(out.lr.big.reg$te.bl)
1-abs(mea.bl)/abs(mea.cl)
1-abs(mea.bl)/abs(mea.co)
1-(sd.bl/sd.co) # .179
1-(sd.bl/sd.cl) # .238

rm(mea.co, mea.cl, mea.bl, sd.bl, sd.co, sd.cl, r.bl, r.co, r.cl)
rm(x, x.resp)
rm(covs.loerub.small, covs.loerub.big, post.loerub)
rm(out.lr, out.lr.big)
rm(out.lr.big.reg, out.lr.reg)
###### END FIGURE 7 #########

##### Figure 8 #######
library(foreign)
library(Matching)
library(exactRankTests)
library(coin)
library(RItools)
library(blockTools)
library(weights)
source("calcBalanceTE.R")
source("balanceCheck.R")
source("plotfuncs.R")

library(gdata)
x <- read.xls("pangre08.xlsx", header = TRUE)
names(x) <- c("city", "state", "CPPstrata", "priorTurnout", "partisan", "priorIncumbVoteShare", "RadioBuyGRPs", "year", "isStatewideElec", "incumbVoteShare", "citation1", "citation2", "citation3")
x$y.pangre <- 100*x$incumbVoteShare-x$priorIncumbVoteShare
x$is2006 <- x$isPartisan <- x$isModCost <- x$isHighCost <- 0
x$is2006[x$year == 2006] <- 1
x$isPartisan[x$partisan == "Y "] <- 1
x$isModCost[x$CPPstrata == "MODERATE "] <- 1
x$isHighCost[x$CPPstrata == "HIGH "] <- 1
x$cppYearStrata <- paste(x$CPPstrata, x$year, sep="")
covs.pangre.exact <- "cppYearStrata"
covs.pangre.mah <- c("priorTurnout", "isPartisan", "isStatewideElec")
## dummify strata
x <- cbind(x, dummify(as.factor(x$cppYearStrata)))
x[, "MOD 2005"] <- x[, "MODERATE 2005"] 
x[, "MODERATE 2005"] <- NULL
x[, "MOD 2006"] <- x[, "MODERATE 2006"] 
x[, "MODERATE 2006"] <- NULL
## remove cppYear stratum singleton, unit 40:
x.tmp <- x[-40,]

load("pangre100.RData")

for(i in 1:3){ 
  out.pangre[[i]] <- out.pangre[[i]][, 2:ncol(out.pangre[[i]])]
}
out.pangre[[4]] <- out.pangre[[4]][2:length(out.pangre[[4]])]
out.pangre$bal.actual

describe(out.pangre$bal.bl)
describe(out.pangre$bal.comp)
describe(out.pangre$bal.clas)

pl.rows <- ceiling(sqrt(ncol(out.pangre[[1]])))
par(mfrow=c(pl.rows, ceiling(ncol(out.pangre[[1]])/pl.rows)))
for(i in 1:ncol(out.pangre[[1]])){
  this.var <- colnames(out.pangre[[1]])[i]
  qqplot(out.pangre$bal.comp[, i], out.pangre$bal.bl[, i], xlim=c(0,1), ylim=c(0,1), xlab = bquote(Unblocked ~ italic(t) ~ "p-values," ~ .(this.var)), ylab = bquote(Blocked ~ italic(t) ~ "p-values, " ~ .(this.var)))
  par(new=TRUE)
  qqplot(out.pangre$bal.clas[, i], out.pangre$bal.bl[, i], xlim=c(0,1), ylim=c(0,1), xlab = "", ylab = "", axes = FALSE, pch=19)
  abline(0,1, col="black", lty = 2)
  abline(out.pangre$bal.actual[i], 0, col="black", lty=3)
}
